Boston Crime

Data Overview

In [5]:
path = "crime.csv"
df = pd.read_csv(path, encoding='latin-1')
df.head()
Out[5]:
INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION DISTRICT REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK HOUR UCR_PART STREET Lat Long Location
0 I182070945 619 Larceny LARCENY ALL OTHERS D14 808 NaN 2018-09-02 13:00:00 2018 9 Sunday 13 Part One LINCOLN ST 42.357791 -71.139371 (42.35779134, -71.13937053)
1 I182070943 1402 Vandalism VANDALISM C11 347 NaN 2018-08-21 00:00:00 2018 8 Tuesday 0 Part Two HECLA ST 42.306821 -71.060300 (42.30682138, -71.06030035)
2 I182070941 3410 Towed TOWED MOTOR VEHICLE D4 151 NaN 2018-09-03 19:27:00 2018 9 Monday 19 Part Three CAZENOVE ST 42.346589 -71.072429 (42.34658879, -71.07242943)
3 I182070940 3114 Investigate Property INVESTIGATE PROPERTY D4 272 NaN 2018-09-03 21:16:00 2018 9 Monday 21 Part Three NEWCOMB ST 42.334182 -71.078664 (42.33418175, -71.07866441)
4 I182070938 3114 Investigate Property INVESTIGATE PROPERTY B3 421 NaN 2018-09-03 21:05:00 2018 9 Monday 21 Part Three DELHI ST 42.275365 -71.090361 (42.27536542, -71.09036101)

Report

In [6]:
df.profile_report()
Out[6]:

In [9]:
plt.figure(figsize=(16,10))
sns.distplot(np.log(df.OFFENSE_CODE), kde=True, rug=True)
plt.show
Out[9]:
<function matplotlib.pyplot.show(*args, **kw)>

Map in Boston

In [12]:
locs = list(zip(df.Lat.dropna(), df.Long.dropna()))


m = folium.Map(location=['42.3600825', '-71.0588801'])
for lat, lng in locs[:1000]:
    m.add_children(folium.Marker((lat, lng)))
m
Out[12]:
In [13]:
locs = list(zip(df.Lat.dropna(), df.Long.dropna()))
print(type(locs))
heat_data = [[lat, long] for lat, long in locs[:30000]]
map_ = folium.Map(location=['42.3600825', '-71.0588801'], zoom_start=11)
hm = plugins.HeatMap(heat_data,auto_play=True,max_opacity=0.4)
hm.add_to(map_)
map_
<class 'list'>
Out[13]:
In [16]:
data = df[df.SHOOTING == 1][["Lat", "Long"]].values

#locs = list(zip(df.Lat.dropna(), df.Long.dropna()))
heat_data = [[lat, long] for lat, long in data[:20]]
map_ = folium.Map(location=['42.3600825', '-71.0588801'], zoom_start=11)
hm = plugins.HeatMap(heat_data,auto_play=True,max_opacity=0.4)
hm.add_to(map_)
map_
Out[16]:

Crime per District

In [17]:
plt.figure(figsize=(16,10))
sns.scatterplot(x='Lat', y='Long', hue='DISTRICT', alpha=0.01, data=df[df['Lat'] > 1])
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
Out[17]:
<matplotlib.legend.Legend at 0x13dc32d30>
In [18]:
crime_per_district=df.DISTRICT.value_counts()

plt.figure(figsize=(10,10))
plt.pie(crime_per_district.values, labels=crime_per_district.index, autopct='%1.1f%%', startangle=90, colors=sns.color_palette("husl", 13))
plt.title('Repartition of crimes by Districts')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2)
plt.show()

Type Of Crime

In [20]:
sns.catplot(y="OFFENSE_CODE_GROUP", kind='count', height=8, aspect=2, order=df.OFFENSE_CODE_GROUP.value_counts().index, data=df)
plt.xlabel('OFFENSE CODE ', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per code", fontsize=20)
Out[20]:
Text(0.5, 1.0, 'Number of crime per code')
In [21]:
sns.catplot( x="YEAR", kind='count', data=df, aspect=2)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per Year", fontsize=20)
plt.show()
In [23]:
sns.catplot( x="Day", kind='count',height=7, aspect=3, data=df, order=col_order)
plt.xlabel('Day', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per day", fontsize=20)
plt.show()
In [24]:
sns.catplot( x="HOUR", kind='count', height=8.27, aspect=3, data=df)
plt.xlabel('Hour', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per Hour", fontsize=20)
plt.show()
In [25]:
sns.catplot( x="MONTH", kind='count', data=df, aspect=2)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.title("Number of crime per months", fontsize=20)
plt.show()
In [ ]:
plt.figure(figsize=(16,10))
data= np.array(df[["INCIDENT_NUMBER", "OFFENSE_CODE", "YEAR", "MONTH", "Location"]])
print(type(data))
sns.distplot(data)
plt.show()